In [2]:
import pandas as pd
import numpy as np
In [9]:
metro_df = pd.read_csv("/scratch/zwang/tmp/redfin_metro_market_tracker.tsv000", delimiter='\t')
In [10]:
metro_df.describe()
Out[10]:
period_duration region_type_id table_id city state property_type_id median_sale_price median_sale_price_mom median_sale_price_yoy median_list_price ... sold_above_list sold_above_list_mom sold_above_list_yoy price_drops price_drops_mom price_drops_yoy off_market_in_two_weeks off_market_in_two_weeks_mom off_market_in_two_weeks_yoy parent_metro_region_metro_code
count 243702.0 243702.0 243702.00000 0.0 0.0 243702.000000 2.432700e+05 224290.000000 218023.000000 2.275400e+05 ... 243087.000000 224093.000000 217774.000000 142085.000000 132403.000000 116606.000000 220545.000000 203646.000000 196641.000000 243702.00000
mean 30.0 -2.0 30188.91170 NaN NaN 4.137459 2.095753e+05 1.732001 2.162750 2.288758e+05 ... 0.182319 0.002564 0.027859 0.193195 0.001587 0.003566 0.265338 0.002672 0.031407 30188.91170
std 0.0 0.0 11607.33619 NaN NaN 4.271163 1.901909e+05 499.981736 589.808332 2.078979e+05 ... 0.187989 0.171851 0.178994 0.114948 0.084396 0.095875 0.242623 0.194101 0.207323 11607.33619
min 30.0 -2.0 10140.00000 NaN NaN -1.000000 1.000000e+00 -0.999996 -0.999994 1.230000e+02 ... 0.000000 -1.000000 -1.000000 0.000043 -0.933333 -0.964286 0.000000 -1.000000 -1.000000 10140.00000
25% 30.0 -2.0 19740.00000 NaN NaN -1.000000 1.190000e+05 -0.075556 -0.021675 1.299000e+05 ... 0.029851 -0.033640 -0.020833 0.125000 -0.031904 -0.035371 0.043478 -0.050000 -0.034799 19740.00000
50% 30.0 -2.0 30780.00000 NaN NaN 4.000000 1.650000e+05 0.006535 0.077578 1.795000e+05 ... 0.142857 0.000000 0.007125 0.178404 0.001507 0.001375 0.229167 0.000000 0.006536 30780.00000
75% 30.0 -2.0 40420.00000 NaN NaN 6.000000 2.417348e+05 0.096972 0.200734 2.600000e+05 ... 0.253383 0.039843 0.076923 0.239316 0.033931 0.038283 0.418919 0.055556 0.099883 40420.00000
max 30.0 -2.0 49780.00000 NaN NaN 13.000000 1.700000e+07 189999.000000 220999.000000 2.890000e+07 ... 1.000000 1.000000 1.000000 1.000000 0.925926 0.952381 1.000000 1.000000 1.000000 49780.00000

8 rows × 49 columns

In [69]:
# metro_df.info()
In [52]:
ny_df = metro_df[metro_df["region"] == 'New York, NY metro area'].sort_values(by='last_updated')
In [53]:
ny_df["period_begin"] = pd.to_datetime(ny_df["period_begin"])
In [54]:
ny_df = ny_df.sort_values(by="period_begin")
condo_df = ny_df[ny_df["property_type"] == "Condo/Co-op"]
In [62]:
import plotly.express as px

fig = px.line(ny_df, x="period_begin", y="median_sale_price", title='New York Metro Area median sale price by property type', color="property_type")
fig.show()
In [63]:
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)

condo_df[condo_df["period_begin"] == "Jun 1, 2015"]
Out[63]:
period_begin period_end period_duration region_type region_type_id table_id is_seasonally_adjusted region city state state_code property_type property_type_id median_sale_price median_sale_price_mom median_sale_price_yoy median_list_price median_list_price_mom median_list_price_yoy median_ppsf median_ppsf_mom median_ppsf_yoy median_list_ppsf median_list_ppsf_mom median_list_ppsf_yoy homes_sold homes_sold_mom homes_sold_yoy pending_sales pending_sales_mom pending_sales_yoy new_listings new_listings_mom new_listings_yoy inventory inventory_mom inventory_yoy months_of_supply months_of_supply_mom months_of_supply_yoy median_dom median_dom_mom median_dom_yoy avg_sale_to_list avg_sale_to_list_mom avg_sale_to_list_yoy sold_above_list sold_above_list_mom sold_above_list_yoy price_drops price_drops_mom price_drops_yoy off_market_in_two_weeks off_market_in_two_weeks_mom off_market_in_two_weeks_yoy parent_metro_region parent_metro_region_metro_code last_updated
140231 2015-06-01 2015-06-30 30 metro -2 35614 f New York, NY metro area NaN NaN NY Condo/Co-op 3 475790.0 0.09377 NaN 489950.0 0.031474 NaN 532.374101 0.06219 NaN 510.204082 -0.021088 NaN 2392.0 0.296477 NaN 1452.0 0.247423 NaN 3404.0 -0.090812 NaN 18170.0 -0.003674 NaN 7.6 -2.3 NaN 110.0 -2.0 NaN 0.982339 0.000358 NaN 0.099498 0.001937 NaN 0.096037 0.013568 NaN 0.092975 -0.040186 NaN New York, NY 35614 2022-04-10 14:38:16
In [183]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=condo_df["period_begin"], y=condo_df["median_ppsf"], name="median ppsf"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=condo_df["period_begin"], y=condo_df["homes_sold"], name="number of homes sold"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="New York Metro Area condo median ppsf and homes sold"
)

# Set x-axis title
fig.update_xaxes(title_text="time")

# Set y-axes titles
fig.update_yaxes(title_text="median ppsf", secondary_y=False)
fig.update_yaxes(title_text="number of homes sold", secondary_y=True)

fig.show()
In [71]:
neighborhood_df = pd.read_csv("/scratch/zwang/tmp/neighborhood_market_tracker.tsv000", delimiter='\t')
In [88]:
nynj_df = neighborhood_df[(neighborhood_df['region'].str.contains("New York, NY")) | (neighborhood_df['region'].str.contains("Jersey City, NJ"))]
In [93]:
hudson_df = neighborhood_df[neighborhood_df['region'] == 'Jersey City, NJ - Hudson Exchange']
In [108]:
pd.set_option('display.max_rows', None)

# nynj_df[["median_ppsf", "region"]].groupby('region').count().sort_values("median_ppsf")
nynj_df.head()
Out[108]:
period_begin period_end period_duration region_type region_type_id table_id is_seasonally_adjusted region city state state_code property_type property_type_id median_sale_price median_sale_price_mom median_sale_price_yoy median_list_price median_list_price_mom median_list_price_yoy median_ppsf median_ppsf_mom median_ppsf_yoy median_list_ppsf median_list_ppsf_mom median_list_ppsf_yoy homes_sold homes_sold_mom homes_sold_yoy pending_sales pending_sales_mom pending_sales_yoy new_listings new_listings_mom new_listings_yoy inventory inventory_mom inventory_yoy months_of_supply months_of_supply_mom months_of_supply_yoy median_dom median_dom_mom median_dom_yoy avg_sale_to_list avg_sale_to_list_mom avg_sale_to_list_yoy sold_above_list sold_above_list_mom sold_above_list_yoy price_drops price_drops_mom price_drops_yoy off_market_in_two_weeks off_market_in_two_weeks_mom off_market_in_two_weeks_yoy parent_metro_region parent_metro_region_metro_code last_updated
40 2016-08-01 2016-10-31 90 neighborhood 1 79695 f New York, NY - Travis-Chelsea New York New York NY Townhouse 13 552500.0 -0.047414 0.284884 699900.0 NaN 0.102206 228.982914 -0.002539 0.389459 322.371032 NaN 0.505828 2.0 -0.333333 -0.333333 1.0 NaN NaN 3.0 NaN 0.50000 2.0 NaN -0.333333 NaN NaN NaN 112.0 21.0 57.0 0.993795 -0.002068 0.042616 0.500000 0.166667 0.500000 NaN NaN NaN 1.000000 NaN NaN New York, NY 35614 2022-04-10 14:38:16
421 2015-07-01 2015-09-30 90 neighborhood 1 92373 f New York, NY - East New York New York New York NY Single Family Residential 6 330000.0 -0.004525 NaN 379000.0 -0.081212 NaN 286.458333 0.158708 NaN 254.975124 -0.123106 NaN 5.0 -0.166667 NaN 1.0 0.000000 NaN 13.0 -0.071429 NaN 23.0 0.15000 NaN NaN NaN NaN 63.0 -80.5 NaN 0.971365 -0.037389 NaN 0.200000 -0.133333 NaN NaN NaN NaN 1.000000 1.000000 NaN New York, NY 35614 2022-04-10 14:38:16
458 2015-10-01 2015-12-31 90 neighborhood 1 32311 f New York, NY - College Point New York New York NY Multi-Family (2-4 Unit) 4 735000.0 0.020833 NaN 853500.0 -0.024571 NaN 362.963035 -0.033157 NaN 354.155361 -0.029374 NaN 21.0 0.000000 NaN 3.0 -0.571429 NaN 26.0 0.130435 NaN 22.0 0.10000 NaN NaN NaN NaN 37.0 10.0 NaN 0.959129 -0.013225 NaN 0.142857 0.000000 NaN NaN NaN NaN 0.000000 -0.285714 NaN New York, NY 35614 2022-04-10 14:38:16
472 2017-02-01 2017-04-30 90 neighborhood 1 79695 f New York, NY - Travis-Chelsea New York New York NY Townhouse 13 637500.0 0.250000 0.333487 549000.0 -0.155255 -0.036842 316.220238 1.247024 0.573198 348.189415 0.080089 0.516731 2.0 1.000000 0.000000 1.0 NaN NaN 3.0 2.000000 0.00000 1.0 NaN -0.500000 NaN NaN NaN 87.0 67.0 -2841.5 0.980920 0.342622 0.004429 0.000000 0.000000 0.000000 NaN NaN NaN 1.000000 NaN NaN New York, NY 35614 2022-04-10 14:38:16
731 2020-11-01 2021-01-31 90 neighborhood 1 43188 f New York, NY - Northeastern Queens New York New York NY Single Family Residential 6 840000.0 0.005988 0.005988 919000.0 -0.003138 0.032069 526.116578 0.013843 0.079213 561.419975 0.009039 -0.021421 467.0 -0.012685 0.225722 70.0 -0.369369 0.22807 376.0 -0.189655 0.13253 494.0 -0.05364 -0.081784 NaN NaN NaN 53.0 -4.0 -35.0 0.954161 0.003086 0.005286 0.079229 0.005233 0.008363 NaN NaN NaN 0.171429 0.054311 0.048622 New York, NY 35614 2022-04-10 14:38:16
In [132]:
zipcode_df = pd.read_csv("/scratch/zwang/tmp/zip_code_market_tracker.tsv000", delimiter='\t')
zipcode_df["zipcode"] = zipcode_df["region"].str[-5:]
In [133]:
zipcode_nynj_df = zipcode_df[(zipcode_df['parent_metro_region'].str.contains("New York, NY"))]
zipcode_allres_df = zipcode_nynj_df[(zipcode_nynj_df["property_type"] == "All Residential")]
In [135]:
zipcode_allres_df.head()
Out[135]:
period_begin period_end period_duration region_type region_type_id table_id is_seasonally_adjusted region city state state_code property_type property_type_id median_sale_price median_sale_price_mom median_sale_price_yoy median_list_price median_list_price_mom median_list_price_yoy median_ppsf median_ppsf_mom median_ppsf_yoy median_list_ppsf median_list_ppsf_mom median_list_ppsf_yoy homes_sold homes_sold_mom homes_sold_yoy pending_sales pending_sales_mom pending_sales_yoy new_listings new_listings_mom new_listings_yoy inventory inventory_mom inventory_yoy months_of_supply months_of_supply_mom months_of_supply_yoy median_dom median_dom_mom median_dom_yoy avg_sale_to_list avg_sale_to_list_mom avg_sale_to_list_yoy sold_above_list sold_above_list_mom sold_above_list_yoy price_drops price_drops_mom price_drops_yoy off_market_in_two_weeks off_market_in_two_weeks_mom off_market_in_two_weeks_yoy parent_metro_region parent_metro_region_metro_code last_updated zipcode
188 2016-09-01 2016-11-30 90 zip code 2 3399 f Zip Code: 10280 NaN New York NY All Residential -1 800000.0 0.013942 0.228879 770000.0 -0.094118 -0.094118 1187.683075 -0.022071 0.283930 1173.494312 0.000000 0.039908 23.0 0.277778 0.045455 1.0 NaN 0.000000 26.0 0.040000 0.130435 51.0 -0.135593 0.062500 NaN NaN NaN 128.0 1.5 3.5 0.987247 -0.006180 -0.005739 0.043478 -0.012077 -0.001976 NaN NaN NaN 0.000000 NaN 0.000000 New York, NY 35614 2022-04-10 14:38:16 10280
251 2020-01-01 2020-03-31 90 zip code 2 3637 f Zip Code: 11106 NaN New York NY All Residential -1 582000.0 -0.221405 -0.090625 689000.0 -0.008633 -0.040056 604.347826 -0.177166 -0.357973 1043.283582 0.117477 0.331429 19.0 -0.269231 0.266667 5.0 0.250000 0.666667 55.0 0.279070 0.447368 75.0 0.209677 0.415094 NaN NaN NaN 97.0 -27.5 1.5 0.960271 -0.013728 -0.017741 0.000000 0.000000 0.000000 NaN NaN NaN 0.200000 -0.050000 -0.133333 New York, NY 35614 2022-04-10 14:38:16 11106
350 2021-06-01 2021-08-31 90 zip code 2 2531 f Zip Code: 07024 NaN New Jersey NJ All Residential -1 362000.0 0.057706 0.064706 330000.0 0.050955 -0.080780 333.333333 -0.003344 0.144304 364.324582 0.012293 0.064736 210.0 0.039604 1.100000 30.0 0.153846 0.500000 265.0 -0.043321 -0.131148 433.0 -0.052516 -0.039911 NaN NaN NaN 128.0 5.5 -25.5 0.966563 0.001500 0.017561 0.128571 -0.005092 0.058571 NaN NaN NaN 0.066667 -0.048718 -0.033333 New York, NY 35614 2022-04-10 14:38:16 07024
740 2020-01-01 2020-03-31 90 zip code 2 2658 f Zip Code: 07432 NaN New Jersey NJ All Residential -1 440000.0 0.035294 -0.027624 474500.0 0.078409 0.056793 255.518464 -0.096337 -0.060364 262.676462 -0.039398 -0.068374 12.0 -0.200000 -0.250000 1.0 -0.666667 -0.750000 24.0 0.411765 0.142857 24.0 0.263158 0.411765 NaN NaN NaN 105.5 12.5 44.5 1.002136 0.011914 0.023947 0.416667 0.216667 0.229167 NaN NaN NaN 0.000000 -0.333333 0.000000 New York, NY 35614 2022-04-10 14:38:16 07432
751 2021-05-01 2021-07-31 90 zip code 2 2695 f Zip Code: 07512 NaN New Jersey NJ All Residential -1 508000.0 0.000000 0.239024 472450.0 -0.004215 0.061806 268.854749 -0.039646 0.098124 283.170737 0.009542 0.148932 36.0 0.058824 0.333333 15.0 1.142857 0.250000 52.0 0.130435 0.083333 33.0 -0.083333 -0.108108 NaN NaN NaN 41.5 7.5 -36.5 1.010654 -0.002012 -0.003013 0.444444 0.032680 -0.074074 NaN NaN NaN 0.066667 -0.076190 -0.516667 New York, NY 35614 2022-04-10 14:38:16 07512
In [160]:
import json
nycgeojson = {}
with open("/scratch/zwang/tmp/ny.geojson", "r") as rfile:
    nycgeojson = json.loads(rfile.read())

njgeojson = {}
with open("/scratch/zwang/tmp/nj.geojson", "r") as rfile:
    njgeojson = json.loads(rfile.read())
    for area in njgeojson["features"]:
        area["properties"]["postalCode"] = area["properties"]["GEOID10"]

nynj_geojson = nycgeojson
nynj_geojson["features"] += njgeojson["features"]
In [178]:
latest_df = zipcode_allres_df[zipcode_allres_df['period_begin'] == '2022-01-01']
In [181]:
fig = px.choropleth(
    latest_df,
    geojson=nynj_geojson,
    locations='zipcode',
    color='median_ppsf',
    color_continuous_scale="Reds",
    featureidkey='properties.postalCode',
    range_color=(500, 2500),
)
fig.update_geos(fitbounds="locations", visible=False)

fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0}, title="NY / NJ median ppsf by zipcode 2022-01-01",)
fig.show()
In [ ]:
 
In [121]:
%bbin
Adding sitedir: /mnt/scooter/bbin/20220429.19.10.11/pyimports
In [184]:
import plotly
plotly.offline.init_notebook_mode()

sharenb()
Original notebook path: '/mnt/shapurefb04_scratcht/zwang/notebooks/redfin.ipynb'
Notebook as permanent ipynb: '/home/zwang/public_html/permanent_notebooks/20220501_redfin.ipynb'
Notebook as permanent html: https://web.hudson-trading.com/~zwang/permanent_notebooks/20220501_redfin.html
In [ ]: